###Appendix replication of Figures 5, 6 , 7 and 8--with weights

library(plyr)
library(dplyr)
library(MASS)
library(stargazer)

setwd("C:/Users/kevin/Dropbox/Social_Media_Lab/Data_Survey/YouGov_Data/apsr_replication/")


load("combined_w_raked_weights.RData")


data<-uk

## Recode the Twitter frequency use variable to better reflect the underlying categories
## Roughly, how many times per month do you go on Twitter?
data$twitter_freq_inc<-as.numeric(
  revalue(as.character(data$twitter_freq_inc) , 
          c("7"= "90", "6"="30", "5" = " 15", "4" = "7", "3"="2", 
            "2"="1", "1"="0")))

###need the all_tweets variable un-logged, and to avoid dividing by zero

data$tweets_all_topics<-(1.0001 +data$tweets_all_topics)

data$all_tweets_log<-1/log(data$tweets_all_topics)

#########################################################################
##Loop over all models
#########################################################################



controls <- c("woman", "age", "lowerclass", "profile_education_age", 
              "white_british", "married", "newsnight_freq", "religious",
              "internet_freq_inc",  "newspaper_type" )



full_parties <- c("Labour", "UKIP", "LibDem", "Tories")

parties <- c("labour", "UKIP", "libdem", "conserv")
issues <- c("EU", "spending", "immigration")
waves <- c("w1", "w2", "w3", "w4")
t_parties <- c("labo", "ukip", "lide", "tory")


qs <- c("isis","unemployment",  "immigrants")
t_issues=c("eu", "economy", "immigr")
t_waves <- c("p1", "p2", "p3", "p4")
pid <- c(2, 5, 3, 1)





##initialize
names<-list()


models_full_raw=list()
models_full=list()

models_combo=list()


labo_means=list()
labo_ses=list()
ukip_means=list()
ukip_ses=list()
lide_means=list()
lide_ses=list()
tory_means=list()
tory_ses=list()
right_media_means=list()
right_media_ses=list()
cent_media_means=list()
cent_media_ses=list()
left_media_means=list()
left_media_ses=list()


total=list()  
total_relevant_tweets=list()  

n<-list()
c_issues<-c("EU", "spend", "immigration")
t_issues=c("eu", "economy", "immigr")


for (j in 1:length(issues)){
  
  
  ##define outcome variables
  names<-c(names, paste0( " ",issues[j]))
  data$outcome=abs(eval(parse(text=paste0("data$soft_correct_" , c_issues[j], "_w4" ))))
  
  
  data$baseline=abs(eval(parse(text=paste0("data$soft_correct_", c_issues[j], "_w1"))))
  
  ##Look at the relevant tweets from each party in the 3 final time period
  data$tweet_count_labo<-  (
    eval(parse(text=paste0("data$p3_1", "_", t_issues[j], "_labo"))) +
      eval(parse(text=paste0("data$p3_2", "_", t_issues[j], "_labo"))) +
      eval(parse(text=paste0("data$p2_1", "_", t_issues[j], "_labo"))) +
      eval(parse(text=paste0("data$p2_2", "_", t_issues[j], "_labo"))) +
      eval(parse(text=paste0("data$p4_1", "_", t_issues[j], "_labo"))) +
      eval(parse(text=paste0("data$p4_2", "_", t_issues[j], "_labo")))
  )
  
  data$tweet_count_lide<-  (
    eval(parse(text=paste0("data$p3_1", "_", t_issues[j], "_lide"))) +
      eval(parse(text=paste0("data$p3_2", "_", t_issues[j], "_lide"))) +
      eval(parse(text=paste0("data$p2_1", "_", t_issues[j], "_lide"))) +
      eval(parse(text=paste0("data$p2_2", "_", t_issues[j], "_lide"))) +
      eval(parse(text=paste0("data$p4_1", "_", t_issues[j], "_lide"))) +
      eval(parse(text=paste0("data$p4_2", "_", t_issues[j], "_lide")))
  )
  
  data$tweet_count_ukip<-  (
    eval(parse(text=paste0("data$p3_1", "_", t_issues[j], "_ukip"))) +
      eval(parse(text=paste0("data$p3_2", "_", t_issues[j], "_ukip"))) +
      eval(parse(text=paste0("data$p2_1", "_", t_issues[j], "_ukip"))) +
      eval(parse(text=paste0("data$p2_2", "_", t_issues[j], "_ukip"))) +
      eval(parse(text=paste0("data$p4_1", "_", t_issues[j], "_ukip"))) +
      eval(parse(text=paste0("data$p4_2", "_", t_issues[j], "_ukip")))
  )
  
  data$tweet_count_tory<-  (
    eval(parse(text=paste0("data$p3_1", "_", t_issues[j], "_tory"))) +
      eval(parse(text=paste0("data$p3_2", "_", t_issues[j], "_tory"))) +
      eval(parse(text=paste0("data$p2_1", "_", t_issues[j], "_tory"))) +
      eval(parse(text=paste0("data$p2_2", "_", t_issues[j], "_tory"))) +
      eval(parse(text=paste0("data$p4_1", "_", t_issues[j], "_tory"))) +
      eval(parse(text=paste0("data$p4_2", "_", t_issues[j], "_tory")))
  )
  
  
  
  
  data$right_media<-{ 
    + eval(parse(text=paste0("data$p3_media_right_", t_issues[j]))) +
      eval(parse(text=paste0("data$p2_media_right_", t_issues[j]))) +
      eval(parse(text=paste0("data$p4_media_right_", t_issues[j])))
  }
  
  data$right_media_log<-log(1.00001 + data$right_media)
  
  data$left_media<-{ 
    + eval(parse(text=paste0("data$p3_media_left_", t_issues[j]))) +
      eval(parse(text=paste0("data$p2_media_left_", t_issues[j]))) +
      eval(parse(text=paste0("data$p4_media_left_", t_issues[j])))
  }
  
  data$left_media_log<-log(1.00001 + data$left_media)
  
  
  data$cent_media<-{ 
    + eval(parse(text=paste0("data$p3_media_cent_", t_issues[j]))) +
      eval(parse(text=paste0("data$p2_media_cent_", t_issues[j]))) +
      eval(parse(text=paste0("data$p4_media_cent_", t_issues[j])))
  }
  
  data$cent_media_log<-log(1.00001 + data$cent_media)
  
  
  
  ##create aggregate variables 
  data$parties<-log(1.0001+ 
                      (data$tweet_count_lide) + (data$tweet_count_labo) +
                      (data$tweet_count_ukip) + (data$tweet_count_tory))
  
  data$media<-log(1.0001+data$cent_media + data$left_media + data$right_media)
  
  data$total<-log(1.0001+ 
                    (data$tweet_count_lide) + (data$tweet_count_labo) +
                    (data$tweet_count_ukip) + (data$tweet_count_tory) +
                    data$cent_media + data$left_media + data$right_media)
  
  total<-c(total, min(data$total, na.rm=T))
  total_relevant_tweets[[j]] <-data$total
  
  
  ##log the main variables 
  
  data$tweet_count_tory<-log(1.00001 + data$tweet_count_tory)
  
  data$tweet_count_labo<-log(1.00001 + data$tweet_count_labo)
  
  data$tweet_count_lide<-log(1.00001 + data$tweet_count_lide)
  
  data$tweet_count_ukip<-log(1.00001 + data$tweet_count_ukip)
  
  
  ## model specifications
  
  #save summary
  models_full[[j]]<-summary(glm(paste0("outcome ~ baseline + total + twitter_freq_inc  +"
                                       , paste0(controls, collapse="+")), data=data, na.action=na.exclude, family="binomial", weights = raked_weights))
  
  #save raw
  models_full_raw[[j]]<-(glm(paste0("outcome ~ baseline + total + twitter_freq_inc  +"
                                    , paste0(controls, collapse="+")), data=data, na.action=na.exclude, family="binomial", weights = raked_weights))
  
  #combo
  models_combo[[j]]<-summary(glm(paste0("outcome ~ baseline + media + parties + twitter_freq_inc  +"
                                        , paste0(controls, collapse="+")), data=data, na.action=na.exclude, family = "binomial", weights = raked_weights))
  
}




################################################################## graphing---all tweets 
full_issues <- c("EU", "Spending", "Immigration")



names<-c("EU", "Spending", "Immigration")
index<-c(1, 2, 3)
##extract mean values 
means<-c(models_full[[1]]$coefficients["total","Estimate"],
         models_full[[2]]$coefficients["total","Estimate"],
         models_full[[3]]$coefficients["total","Estimate"]
         
         
)

##extract standard errors values 
ses<-c(models_full[[1]]$coefficients["total","Std. Error"],
       models_full[[2]]$coefficients["total","Std. Error"],
       models_full[[3]]$coefficients["total","Std. Error"]
)  



means<-unlist(means)
ses<-unlist(ses)

##plot 
pdf("results/all_w1w4_place_correct_total_weighted.pdf", 8,4)
par(mar=c(4, 8, 4, 4)  )
plot( means, index, xlim=c(-.2, .2), ylab="", yaxt="n", xlab="Logistic regression coefficients on number of relevant tweets",
     # main = paste0("Weighted Effects of Tweets on Placement Accuracy"), pch=3)
     main = "", pch=3)

axis(2, at=index, labels =paste0(names), las=2)
abline(v=0)
points(means + 1.64*ses, index, pch="|")
points(means - 1.64*ses, index, pch="|")
for(i in 1:length(means)){
  lines(c(means[i] - 1.96*ses[i],means[i] + 1.96*ses[i]), c(index[i],index[i]))
}

dev.off()




################################################################## graphing---all tweets OR
full_issues <- c("EU", "Spending", "Immigration")

summary(data$total)

library(effects)
eff<-allEffects(models_full_raw[[1]], xlevels=list("total"=seq(0,10,1)))

##plot 
pdf(paste0("results/",full_issues[1],"_w1w4_full_OR_weighted.pdf"), 8,4)
par(mar=c(4, 8, 4, 2)  )

plot(eff, 'total', xlab='Log of Topical Tweets', ylab='Weighted Probablity of Correct Ranking W4',
  #   main=paste0(full_issues[1]))
main = "")
dev.off()


eff<-allEffects(models_full_raw[[2]], xlevels=list("total"=seq(0,10,1)))

##plot 
pdf(paste0("results/",full_issues[2],"_w1w4_full_OR_weighted.pdf"), 8,4)
par(mar=c(4, 8, 4, 2)  )


plot(eff, 'total', xlab='Log of Topical Tweets', ylab='Weighted Probablity of Correct Ranking W4',
     #main=paste0(full_issues[2]))
main = "")

dev.off() 

eff<-allEffects(models_full_raw[[3]], xlevels=list("total"=seq(0,10,1)))

##plot 
pdf(paste0("results/",full_issues[3],"_w1w4_full_OR_weighted.pdf"), 8,4)
par(mar=c(4, 8, 4, 2)  )


plot(eff, 'total', xlab='Log of Topical Tweets', ylab='Weighted Probablity of Correct Ranking W4',
  #   main=paste0(full_issues[3]))
  main = "")

dev.off()


################graphing--combined results

full_issues <- c("EU", "Spending", "Immigration")


all_means<-vector()
all_ses<-vector()

for(i in 1:length(models_combo)){
  
  names<-c("Parties", "Media")
  index<-c(.1, .2)
  ##extract mean values 
  means<-c(models_combo[[i]]$coefficients["parties","Estimate"],
           models_combo[[i]]$coefficients["media","Estimate"])
  
  ##extract standard errors values 
  ses<-c(models_combo[[i]]$coefficients["parties","Std. Error"],
         models_combo[[i]]$coefficients["media","Std. Error"])  
  
  
  
  all_means<-c(all_means,unlist(means))
  all_ses<-c(all_ses,unlist(ses))
  
  
}




####################################

#Combine into one graph


##plot 
pdf(paste0("results/all_w1w4_place_correct_combo_weighted.pdf"), 12,4)
par(mar=c(4, 10, 4, 2)  )
index<-seq(.1, .6, length.out = 6)
plot( all_means, index, xlim=c(-.2, .2), ylim=c(.09, .61), ylab="", yaxt="n", xlab="Logistic regression coefficients on number of relevant tweets",
 #     main = paste0("Weighted Effects of Tweets on Party Placement Accuracy"), pch=3)
 main = "", pch=3)
names<- c("EU: Parties", "EU: Media","Spending: Parties", "Spending: Media","Immigration: Parties","Immigration: Media"  
)
axis(2, at=index, labels =paste0(names), las=2)
abline(v=0)
points(all_means + 1.64*all_ses, index, pch="|")
points(all_means - 1.64*all_ses, index, pch="|")
for(i in 1:length(all_means)){
  lines(c(all_means[i] - 1.96*all_ses[i],all_means[i] + 1.96*all_ses[i]), c(index[i],index[i]))
}

dev.off()
